setwd("E:\\5hmc_file\\2_5hmc_yjp_bam\\ASM")
file=read.csv("./20201112做汇总表/all.FDR.sig.at.least.one.csv",head=T)
group1=c("X2B_X1T","M8_M7","M6_M5","M2_M1","M48_M47","M50_M49","M28_M27","M30_M29","M26_M25","M35_M36","M18_M17","M20_M19","M22_M21","M40_M39")
file=file[!(file$num1==2&file$twins==1),]
file=file[file$num1>1,]
file=file[!file$pattern1.DC=="",]#only sig in DC
file$pattern.nodupl.DC=NA
file$pattern.nodupl.num.DC=1
for(j in 1:dim(file)[1]){
  str1=unique(unlist(strsplit(file[j,"pattern1.DC"],":")))
  file[j,"pattern.nodupl.DC"]=paste(str1,collapse = ":")
  file[j,"pattern.nodupl.num.DC"]=length(str1)
}
file$pattern.nodupl.CC=NA
file$pattern.nodupl.num.CC=1
for(j in 1:dim(file)[1]){
  str1=unique(unlist(strsplit(file[j,"pattern1.CC"],":")))
  file[j,"pattern.nodupl.CC"]=paste(str1,collapse = ":")
  file[j,"pattern.nodupl.num.CC"]=length(str1)
}
file$pattern.nodupl.HC=NA
file$pattern.nodupl.num.HC=1
for(j in 1:dim(file)[1]){
  str1=unique(unlist(strsplit(file[j,"pattern1.HC"],":")))
  file[j,"pattern.nodupl.HC"]=paste(str1,collapse = ":")
  file[j,"pattern.nodupl.num.HC"]=length(str1)
}
#file$pattern.direction.DC="nosame"
#file[file$pattern.nodupl.DC=="normal_up-tumor_up"|file$pattern.nodupl.DC=="normal_down-tumor_down",]$pattern.direction.DC="same"

a=grep(names(file),pattern = "pattern.DC")
i=1
rt=data.frame(table(file[,a[i]]))
names(rt)=c("Var",names(file)[a[i]])
for(i in 2:6){
  tmp=data.frame(table(file[,a[i]]))
  names(tmp)=c("Var",names(file)[a[i]])
  rt=merge(rt,tmp,by="Var")
}
write.csv(rt,"./20201117/DC.statis.csv",quote=F,row.names = F)

a=grep(names(file),pattern = "pattern.CC")
i=1
rt=data.frame(table(file[,a[i]]))
names(rt)=c("Var",names(file)[a[i]])
for(i in 2:4){
  tmp=data.frame(table(file[,a[i]]))
  names(tmp)=c("Var",names(file)[a[i]])
  rt=merge(rt,tmp,by="Var")
}
write.csv(rt,"./20201117/CC.statis.csv",quote=F,row.names = F)

a=grep(names(file),pattern = "pattern.HC")
i=1
rt=data.frame(table(file[,a[i]]))
names(rt)=c("Var",names(file)[a[i]])
for(i in 2:4){
  tmp=data.frame(table(file[,a[i]]))
  names(tmp)=c("Var",names(file)[a[i]])
  rt=merge(rt,tmp,by="Var")
}
write.csv(rt,"./20201117/HC.statis.csv",quote=F,row.names = F)

file1=file[file$pattern.nodupl.num.DC==1,]##only one kind of pattern in DC
test=data.frame(table(file1$pattern.nodupl.DC))
write.csv(test,"./20201117/DC.union.statis.csv",quote=F,row.names = F)

file1=file[file$pattern.nodupl.num.CC==1,]##only one kind of pattern in DC
test=data.frame(table(file1$pattern.nodupl.CC))
write.csv(test,"./20201117/CC.union.statis.csv",quote=F,row.names = F)

file1=file[file$pattern.nodupl.num.HC==1,]##only one kind of pattern in DC
test=data.frame(table(file1$pattern.nodupl.HC))
write.csv(test,"./20201117/HC.union.statis.csv",quote=F,row.names = F)

file2=file
file2$pattern.nodupl.DC.8to5=""
for(j in 1:dim(file2)[1]){
  str1=unique(unlist(strsplit(file2[j,"pattern.nodupl.DC"],":")))
  str1=gsub("normal_down-tumor_nosig","one_is_down",str1)
  str1=gsub("normal_down-tumor_up","onedown_oneup",str1)
  str1=gsub("normal_nosig-tumor_down","one_is_down",str1)
  str1=gsub("normal_nosig-tumor_up","one_is_up",str1)
  str1=gsub("normal_up-tumor_down","onedown_oneup",str1)
  str1=gsub("normal_up-tumor_nosig","one_is_up",str1)
  str1=gsub("normal_down-tumor_down","normal_down_tumor_down",str1)
  str1=gsub("normal_up-tumor_up","normal_up_tumor_up",str1)
  
  file2[j,"pattern.nodupl.DC.8to5"]=paste(str1,collapse = ":")
}
file22=file2
file22[file22$pattern.nodupl.DC.8to5=="",]$pattern.nodupl.DC.8to5=NA
file22[file22$pattern.nodupl.CC=="",]$pattern.nodupl.CC=NA
file22[file22$pattern.nodupl.HC=="",]$pattern.nodupl.HC=NA
file22=tidyr::unite(file22,pattern.summary,pattern.nodupl.DC.8to5,pattern.nodupl.CC,pattern.nodupl.HC,sep=":",na.rm=TRUE,remove=FALSE)

file22$pattern.nodupl.summary=NA
file22$pattern.nodupl.num.summary=1
for(j in 1:dim(file22)[1]){
  str1=unique(unlist(strsplit(file22[j,"pattern.summary"],":")))
  file22[j,"pattern.nodupl.summary"]=paste(str1,collapse = ":")
  file22[j,"pattern.nodupl.num.summary"]=length(str1)
}

file1=file22[file22$pattern.nodupl.num.summary==1,]##only one pattern in all sample
test=data.frame(table(file1$pattern.nodupl.summary))
write.csv(test,"./20201117/only.one.pattern.in.all.sample.union.statis.csv",quote=F,row.names = F)

all.upup=file1[file1$pattern.nodupl.summary=="normal_up_tumor_up",]$unitID
all.downdown=file1[file1$pattern.nodupl.summary=="normal_down_tumor_down",]$unitID

DC=file[file$pattern.nodupl.num.DC==1,]
DC.upup=DC[DC$pattern.nodupl.DC=="normal_up-tumor_up",]$unitID
DC.downdown=DC[DC$pattern.nodupl.DC=="normal_down-tumor_down",]$unitID

CC=file[file$pattern.nodupl.num.CC==1,]
CC.upup=CC[CC$pattern.nodupl.CC=="normal_up_tumor_up",]$unitID
CC.downdown=CC[CC$pattern.nodupl.CC=="normal_down_tumor_down",]$unitID

HC=file[file$pattern.nodupl.num.HC==1,]
HC.upup=HC[HC$pattern.nodupl.HC=="normal_up_tumor_up",]$unitID
HC.downdown=HC[HC$pattern.nodupl.HC=="normal_down_tumor_down",]$unitID

length(intersect(all.upup,DC.upup))
length(intersect(all.upup,CC.upup))
length(intersect(all.upup,HC.upup))
length(intersect(all.downdown,DC.downdown))
length(intersect(all.downdown,CC.downdown))
length(intersect(all.downdown,HC.downdown))

file1$id=paste(file1$Chr,file1$Start,sep=":")
bf=read.table("E:/5hmc_file/2_5hmc_yjp_bam/ASM/bayes_p/bias_AShM_BF_no_motif.txt",header = T,sep="\t")
bf=data.frame(id=bf$unitID,BF_in_DC=bf$BF_in_DC)
test=merge(bf,file1,by="id")
gene=test$Gene.refGene
gene=unique(unlist(strsplit(gene,";")))
write.table(gene,"./20201117/part.BF.more.than.3.gene.txt",quote = F,row.names = F,sep="\t")
